package com.example.readworldfile.controller;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.example.readworldfile.wordUtil.WordRead;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

/**
 * word、pdf等文件读取
 */
@RequestMapping("/auth/readFile/")
@RestController
public class ReadFileController {

    /**
     * 读取Word中的文档内容(doc\docx)不带格式
     */
    @RequestMapping(value = "/readWordFile")
    public Map<String, Object> readWordFile(HttpServletRequest request, HttpServletResponse response) {
        Map<String, Object> result = new HashMap<String, Object>();
        //word文件地址放在src/main/webapp/下
        //表示到项目的根目录（webapp）下，要是想到目录下的子文件夹，修改"/"即可
//        String path = request.getSession().getServletContext().getRealPath("/");
        //String filePath = path+"template/w1.doc";
        String filePath = "D:/test.docx";

        String suffixName = filePath.substring(filePath.lastIndexOf("."));//从最后一个.开始截取。截取fileName的后缀名
        try {
            File file = new File(filePath);
            FileInputStream fs = new FileInputStream(file);
            if (suffixName.equalsIgnoreCase(".doc")) {//doc
                StringBuilder result2 = new StringBuilder();
                WordExtractor re = new WordExtractor(fs);
                result2.append(re.getText());//获取word中的文本内容
                re.close();
                result.put("content", result2.toString());
            } else {//docx
                XWPFDocument doc = new XWPFDocument(fs);
                XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
                String text = extractor.getText();//获取word中的文本内容
                extractor.close();
                fs.close();
                result.put("content", text);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }

    /**
     * 读取Word中的文本内容转html(docx)
     */
    @RequestMapping(value = "/readWordFile2")
    public Map<String, Object> readWordFile2(HttpServletRequest request, HttpServletResponse response) {
        Map<String, Object> result = new HashMap<String, Object>();
        //word文件地址放在src/main/webapp/下
        //表示到项目的根目录（webapp）下，要是想到目录下的子文件夹，修改"/"即可
//        String path = request.getSession().getServletContext().getRealPath("/");
//        String filePath = path+"template/wp.docx";
        String filePath = "D:/test.docx";
        try {
            //读取Word中的文本内容包含表格
            String wordhtml = WordRead.readWordToHtml(filePath);
            result.put("content", wordhtml);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }

    /**
     * 读取docx文档内容(文本、图片和表格---手动组装格式)
     */
    @RequestMapping(value ="/readWordFile3")
    public Map<String,Object> readWordFile3(HttpServletRequest request,HttpServletResponse response){
        Map<String,Object> result = new HashMap<>();
        //word文件地址放在src/main/webapp/下
        //表示到项目的根目录（webapp）下，要是想到目录下的子文件夹，修改"/"即可
//        String path = request.getSession().getServletContext().getRealPath("/");
//        String filePath = path+"template/wp.docx";
        String filePath = "D:/test.docx";
        try {
            //读取Word中的文本内容包含表格
            String wordhtml = WordRead.readWordImgToHtml(filePath);
            result.put("content", wordhtml);
        }catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }




}

